intro to GGPlot2

gramar of graphics

He’s going to push his file to github for us to reference so we dont need to copy

Start with a ggplot add data and arstedic mapping geometric objests scales facet specs statistical transormation *coordinate system

#Lets get the data ready

data("gapminder")
str(gapminder) #6 variables, 2 factors, year int, lifeexpectancey, gdp per cap, etc. 
## Classes 'tbl_df', 'tbl' and 'data.frame':    1704 obs. of  6 variables:
##  $ country  : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ year     : int  1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
##  $ lifeExp  : num  28.8 30.3 32 34 36.1 ...
##  $ pop      : int  8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 ...
##  $ gdpPercap: num  779 821 853 836 740 ...
#ggplot2::  #reference the package without lib. 
ggplot(data=gapminder, mapping = aes(x = year, y = pop)) +
  geom_point()                              #put in some points 

ggplot()+
  geom_point(data = gapminder, mapping = aes( x= year, y= pop)) # this is useful for layering two datasets, you can do with smaller subsets of the data for layers 

ggplot()+
  geom_point(data = gapminder, mapping = aes( x= year, y= pop, color= continent))

 ggplot(data = gapminder, mapping = aes( x= year, y= pop, color= continent))+
   geom_point()

#Aggregate by contients

gapminder %>%
  group_by(year,continent) %>% 
    summarise(pop= sum(pop)) %>% 
   ggplot(data = ., mapping = aes( x= year, y= pop, color= continent))+
   geom_point() # our scale here is way off, not very useful to show 
## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))

## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))

## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))

## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))

## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))

## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))

## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))

## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))
## Warning: Removed 8 rows containing missing values (geom_point).

# make a transformation to the Y - a log 

gapminder %>%
  group_by(year,continent) %>% 
    summarise(pop= sum(pop)) %>% 
   ggplot(data = ., mapping = aes( x= year, y= pop, color= continent))+
   geom_point() +
  scale_y_log10()  # now we can see oceian, but at the cost of other things 
## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))
## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))

## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))

## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))

## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))

## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))

## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))

## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))
## Warning: Removed 8 rows containing missing values (geom_point).

view(gapminder)
summary(gapminder)
##         country        continent        year         lifeExp     
##  Afghanistan:  12   Africa  :624   Min.   :1952   Min.   :23.60  
##  Albania    :  12   Americas:300   1st Qu.:1966   1st Qu.:48.20  
##  Algeria    :  12   Asia    :396   Median :1980   Median :60.71  
##  Angola     :  12   Europe  :360   Mean   :1980   Mean   :59.47  
##  Argentina  :  12   Oceania : 24   3rd Qu.:1993   3rd Qu.:70.85  
##  Australia  :  12                  Max.   :2007   Max.   :82.60  
##  (Other)    :1632                                                
##       pop              gdpPercap       
##  Min.   :6.001e+04   Min.   :   241.2  
##  1st Qu.:2.794e+06   1st Qu.:  1202.1  
##  Median :7.024e+06   Median :  3531.8  
##  Mean   :2.960e+07   Mean   :  7215.3  
##  3rd Qu.:1.959e+07   3rd Qu.:  9325.5  
##  Max.   :1.319e+09   Max.   :113523.1  
## 

looking at other variables - GDP per Cap

ggplot(data = gapminder, mapping = aes( x= year, y=gdpPercap, color = country))+
  geom_point() +      #facet by countires - grip = two variabels, wrap = only one 
  facet_wrap(~continent)

# this got a loter nutty - we want to filter by Asia 

gapminder%>%
  filter(continent == "Asia") %>% 
ggplot(data = ., mapping = aes( x= year, y=gdpPercap, color = country))+
  geom_point() +      #facet by countires - grip = two variabels, wrap = only one 
  facet_wrap(~continent)

#try two numeric values

ggplot(data = gapminder, aes(x=gdpPercap, y = lifeExp)) +
  geom_point() # two numeric values - see some interesting realtionship kinda weird - well try the log 

ggplot(data = gapminder, aes(x=gdpPercap, y = lifeExp)) +
  geom_point() + 
  scale_x_log10()  # this is alot easier to read - so an example of when you'd want to do it, check it out by contient 

ggplot(data = gapminder, aes(x=gdpPercap, y = lifeExp, color = continent)) +
  geom_point() + 
  scale_x_log10() +  # this is better yet
  stat_smooth() # add the smoother line in, default is the loess smoother, local regressions, we can change that if we want 
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(data = gapminder, aes(x=gdpPercap, y = lifeExp, color = continent)) +
  geom_point() + 
  scale_x_log10() +  # this is better yet
  stat_smooth( method = lm) # life expetancy on the log of gdp per cap ( using a linear model)

# you can make this graph even more confusing/ advanced 

ggplot(data = gapminder, aes(x=gdpPercap, y = lifeExp, color = continent, size = pop)) +
  geom_point() + 
  scale_x_log10() +  # this is going to be bad - we increase point size by the population weight 
  stat_smooth( method = lm)

gapminder%>%
  filter(continent == "Asia") %>% 
ggplot(data = ., mapping = aes( x= year, y=gdpPercap, color = country))+
  geom_point() +      #facet by countires - grip = two variabels, wrap = only one 
  facet_wrap(~continent)

# changing mappong for pop

gapminder %>%
  filter( continent== "Europe") %>%
  group_by(country) %>% 
  summarise(gdpPercap = mean (gdpPercap), 
lifeExp = mean( lifeExp))%>%
ggplot(data = . , aes(x=gdpPercap, y = lifeExp, shape = country)) +
  geom_point() + 
  scale_x_log10() + 
  stat_smooth( method = lm)
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 30. Consider specifying shapes manually if you must have them.
## Warning: Removed 24 rows containing missing values (geom_point).

#Not really usefuly, but we can see that there are multiple shapes though 

Back to square one

ggplot(data = gapminder , mapping=  aes(x=gdpPercap, y = lifeExp, fill = continent)) +
  geom_point() + 
  scale_x_log10() +
  stat_smooth( method = lm) # cool to see that colors 

ggplot(data = gapminder , mapping=  aes(x=gdpPercap, y = lifeExp, color = continent)) +
  scale_x_log10() +
  stat_smooth( method = lm) # to get ride of points omit the geom_point 

# dashed lines, hard to tell though which is which, cause it kind of makes it up 

ggplot(data = gapminder , mapping=  aes(x=gdpPercap, y = lifeExp, linetype = continent)) +
  geom_point(aes( color = continent))+   # you can add the color in the points 
  scale_x_log10() +
  stat_smooth( method = lm)

#getting rid of the lines 

ggplot(data = gapminder , mapping=  aes(x=gdpPercap, y = lifeExp)) +
  geom_point(aes( color = continent, alpha = pop))+  # looks mysterious, alpha can take a fixed value for the color or you can change he hue of each point  
  scale_x_log10() +
  stat_smooth( method = lm)

ggplot(data = gapminder , mapping=  aes(x=gdpPercap, y = lifeExp)) +
  geom_point(aes( color = continent), alpha = .5) +  # if you move the alpha outside of the aes () then you can control the weight/ hue, alpah ranges from 0-1 
  scale_x_log10() +
  stat_smooth( method = lm, mapping = aes ( color = continent))

ggplot(data = gapminder , mapping=  aes(x=gdpPercap, y = lifeExp)) +
  geom_point( alpha = .4) + 
  scale_x_log10() +
  stat_smooth( method = lm) + 
  facet_wrap(~continent) # added a facet, shows the relationships, and breaks the scatter plots out 

ggplot(data = gapminder , mapping=  aes(x=gdpPercap, y = lifeExp)) +
  geom_point( alpha = .4) + 
  scale_x_log10() +
  stat_smooth( method = lm) + 
  facet_wrap(~continent)+ 
  xlab("GDP Per Capita")+ 
  ylab ("Life Expectancy")+
  ggtitle("Life Expectancy by GDP Per Capita", subtitle = "Faceted by Continet") # adding the titles, and the axisis 

# Getting Weird again

ggplot(data = gapminder , mapping=  aes(x=gdpPercap, y = lifeExp)) +
  geom_point( alpha = .4) + 
  scale_x_log10() +
  stat_smooth( method = lm) + 
  facet_wrap(~continent)+ 
  xlab("GDP Per Capita")+ 
  ylab ("Life Expectancy")+
  ggtitle("Life Expectancy by GDP Per Capita", subtitle = "Faceted by Continet")+ 
  coord_flip() #flipping the axis 

gapminder %>% 
ggplot(data = . , mapping=  aes(x= continent, y = lifeExp)) +
  geom_boxplot() # here we can see the box plots, but if we flip its a little easier to read 

gapminder %>% 
ggplot(data = . , mapping=  aes(x= continent, y = lifeExp)) +
  geom_boxplot() + 
  coord_flip() # yes, this is easier to read for sure 

gapminder %>% 
ggplot(data = . , mapping=  aes(x= continent, y = lifeExp)) +
  geom_boxplot() + 
  coord_flip()+
  theme_minimal() #took out the grey background, classic would remove all of the lines

gapminder %>% 
ggplot(data = . , mapping=  aes(x= continent, y = lifeExp)) +
  geom_point()+
  geom_boxplot() + 
  coord_flip()+
  theme_minimal() # a bit much, but lets you see a little more on the data, we can change this if we wanted 

gapminder %>% 
ggplot(data = . , mapping=  aes(x= continent, y = lifeExp)) +
  geom_point()+
  geom_boxplot() + 
  coord_flip()+
  theme_fivethirtyeight()

# west andersone themes, you can have plots in the different themes and colors etc. 


gapminder %>% 
ggplot(data = . , mapping=  aes(x= continent, y = lifeExp)) +
  stat_summary(fun.ymin = min, 
               fun.ymax = max, 
               fun.y=median,
               alpha=.9)+
  geom_point()+
  coord_flip()+
  theme_fivethirtyeight()

# another useful option is the id. so you can have fill, doge or gitter - if all you had was the min, median and max, you coudl still plot, you could pass them in directly and you could create some fake data and plot that around there if you wanted !